Subgroup Identification Analysis

GBSG Example

Author

Larry Leon

Code
# Set options
knitr::opts_chunk$set(
  echo = TRUE,
  warning = FALSE,
  message = FALSE,
  fig.align = 'center',
  fig.retina = 2
)
rm(list=ls())
library(tinytex)
Warning: package 'tinytex' was built under R version 4.5.2
Code
library(ggplot2)

#library(table1)

library(gt)

library(survival)
library(data.table)
library(randomForest)
library(grf)
library(policytree)
library(DiagrammeR)

#library(grid)
#library(forestploter)
#library(randomizr)

# library(devtools)
# install_github("larry-leon/weightedsurv", force = TRUE)
#install.packages("weightedsurv")
# install_github("larry-leon/forestsearch", force = TRUE)

library(forestsearch)
library(weightedsurv)

# Set theme for plots
theme_set(theme_minimal(base_size = 12))

1 Summary

Reproducing main GBSG analysis

1.1 Datasetup

Code
df.analysis <- gbsg
df.analysis <- within(df.analysis,{
id <- as.numeric(c(1:nrow(df.analysis)))  
# time to months
time_months <- rfstime/30.4375
grade3 <- ifelse(grade=="3",1,0)
treat <- hormon
})
confounders.name <- c("age","meno","size","grade3","nodes","pgr","er")
outcome.name <- c("time_months")
event.name <- c("status")
id.name <- c("id")
treat.name <- c("hormon")

1.2 Kaplan-Meier curves and baseline summary

Code
dfcount <- df_counting(
  df = df.analysis,
  by.risk = 6,
  tte.name = outcome.name, 
  event.name = event.name, 
  treat.name = treat.name
)
plot_weighted_km(dfcount, conf.int = TRUE, show.logrank = TRUE, ymax = 1.05, xmed.fraction = 0.775, ymed.offset = 0.125)

Code
create_summary_table(data = df.analysis, treat_var = treat.name, 
                     table_title = "GBSG Characteristics by Treatment Arm",
                                      vars_continuous=c("age","nodes","size","er","pgr"),
                                      vars_categorical=c("grade","grade3"),
                                      font_size = 12)
GBSG Characteristics by Treatment Arm
Characteristic Control (n=440) Treatment (n=246) P-value1 SMD2
age Mean (SD) 51.1 (10.0) 56.6 (9.4) <0.001 0.57
nodes Mean (SD) 4.9 (5.6) 5.1 (5.3) 0.665 0.03
size Mean (SD) 29.6 (14.4) 28.8 (14.1) 0.470 0.06
er Mean (SD) 79.7 (124.2) 125.8 (191.1) <0.001 0.30
pgr Mean (SD) 102.0 (170.0) 124.3 (249.7) 0.213 0.11
grade 0.273 0.06
1 48 (10.9%) 33 (13.4%)
2 281 (63.9%) 163 (66.3%)
3 111 (25.2%) 50 (20.3%)
grade3 0.174 0.05
0 329 (74.8%) 196 (79.7%)
1 111 (25.2%) 50 (20.3%)
1 P-values: t-test for continuous, chi-square/Fisher's exact for categorical/binary variables
2 SMD = Standardized mean difference (Cohen's d for continuous, Cramer's V for categorical)

1.3 GRF analysis

Code
## GRF
grf_est1 <- grf.subg.harm.survival(data=df.analysis,
confounders.name = confounders.name,
outcome.name=outcome.name, event.name=event.name, id.name=id.name, treat.name=treat.name,
maxdepth = 2, n.min = 60, dmin.grf = 12, frac.tau=0.6, details=TRUE)
tau, maxdepth = 46.75811 2 
   leaf.node control.mean control.size control.se depth
1          2         6.49        82.00       3.34     1
2          3        -4.10       604.00       1.06     1
11         4        -7.90       112.00       2.81     2
21         5         3.86       177.00       1.87     2
4          7        -5.89       356.00       1.33     2

Selected subgroup:
  leaf.node control.mean control.size control.se depth
1         2         6.49        82.00       3.34     1

GRF subgroup found
Terminating node at max.diff (sg.harm.id):
[1] "er <= 0"

All splits:
[1] "er <= 0"   "age <= 50" "age <= 43"
Code
# NOTE: In general for GRF trees
# leaf1 --> recommend control
# leaf2 --> recommend treatment
# Tree depth 1
plot(grf_est1$tree1,leaf.labels=c("Control","Treat"))
Code
# Tree depth 2
plot(grf_est1$tree2,leaf.labels=c("Control","Treat"))

1.4 Forestsearch with depth=2 (maxk = 2)

Code
# Setup parallel processing
library(doFuture)
library(doRNG)

registerDoFuture()
registerDoRNG()

system.time({fs <- forestsearch(df.analysis,  confounders.name = confounders.name,
                                outcome.name = "time_months", treat.name = "hormon", event.name = "status", id.name = "id",
                                potentialOutcome.name = NULL, 
                                df.test = NULL,
                                flag_harm.name = NULL,
                                hr.threshold = 1.25, hr.consistency = 1.0, pconsistency.threshold = 0.90,
                                sg_focus = "maxSG", max_subgroups_search = 30,
                                use_twostage = TRUE,
                                showten_subgroups = TRUE, details=TRUE,
                                conf_force = NULL,
                                cut_type = "default", use_grf = TRUE, plot.grf = TRUE, use_lasso = FALSE,
                                maxk = 2, fs.splits = 1000,
                                n.min = 60, d0.min = 10, d1.min = 10,
                                plot.sg = TRUE, by.risk = 6,
                                parallel_args = list(plan="callr", workers = 30, show_message = TRUE)
)
})

=== Two-Stage Consistency Evaluation Enabled ===
Stage 1 screening splits: 30 
Maximum total splits: 1000 
Batch size: 20 
================================================

GRF stage for cut selection with dmin, tau = 12 0.6 
tau, maxdepth = 46.75811 2 
   leaf.node control.mean control.size control.se depth
1          2         6.49        82.00       3.34     1
2          3        -4.10       604.00       1.06     1
11         4        -7.90       112.00       2.81     2
21         5         3.86       177.00       1.87     2
4          7        -5.89       356.00       1.33     2

Selected subgroup:
  leaf.node control.mean control.size control.se depth
1         2         6.49        82.00       3.34     1

GRF subgroup found
Terminating node at max.diff (sg.harm.id):
[1] "er <= 0"

All splits:
[1] "er <= 0"   "age <= 50" "age <= 43"
GRF cuts identified: 3 
  Cuts: er <= 0, age <= 50, age <= 43 
# of continuous/categorical characteristics 5 2 
Continuous characteristics: age size nodes pgr er 
Categorical characteristics: meno grade3 
Default cuts included (1st 20) age <= mean(age) age <= median(age) age <= qlow(age) age <= qhigh(age) size <= mean(size) size <= median(size) size <= qlow(size) size <= qhigh(size) nodes <= mean(nodes) nodes <= median(nodes) nodes <= qlow(nodes) nodes <= qhigh(nodes) pgr <= mean(pgr) pgr <= median(pgr) pgr <= qlow(pgr) pgr <= qhigh(pgr) er <= mean(er) er <= median(er) er <= qlow(er) er <= qhigh(er) 
Categorical: meno grade3 
Factors per GRF: er <= 0 age <= 50 age <= 43 
Initial GRF cuts included er <= 0 age <= 50 age <= 43 

===== CONSOLIDATED CUT EVALUATION (IMPROVED) =====
Evaluating 25 cut expressions once and caching...
Cut evaluation summary:
  Total cuts:  25 
  Valid cuts:  25 
  Errors:  0 
✓ All 25 factors validated as 0/1
===== END CONSOLIDATED CUT EVALUATION =====

# of candidate subgroup factors= 25 
 [1] "er <= 0"      "age <= 50"    "age <= 43"    "age <= 53.1"  "age <= 53"   
 [6] "age <= 46"    "age <= 61"    "size <= 29.3" "size <= 25"   "size <= 20"  
[11] "size <= 35"   "nodes <= 5"   "nodes <= 3"   "nodes <= 1"   "nodes <= 7"  
[16] "pgr <= 110"   "pgr <= 32.5"  "pgr <= 7"     "pgr <= 131.8" "er <= 96.3"  
[21] "er <= 36"     "er <= 8"      "er <= 114"    "meno"         "grade3"      
Number of possible configurations (<= maxk): maxk = 2 , # combinations = 1275 
Events criteria: control >= 10 , treatment >= 10 
Subgroup search completed in 0.03 minutes
Found 25 subgroup candidate(s)
# of candidate subgroups (meeting all criteria) = 25 
Removed 1 near-duplicate subgroups
Original rows: 25 
After removal: 24 
# of unique initial candidates: 24 
# Restricting to top stop_Kgroups = 30 
# of candidates to evaluate: 24 
Algorithm: Two-stage sequential 
  Stage 1 splits: 30 
  Screen threshold: 0.763 
  Max total splits: 1000 
  Batch size: 20 
Parallel processing: callr with 30 workers

*** Subgroup found: {er <= 0} 
% consistency criteria met= 0.95 
SG focus= maxSG 
Subgroup Consistency Minutes= 0.053 
Algorithm used: Two-stage sequential 
Candidates evaluated: 24 
Candidates passed: 7 
Subgroup found (FS) with sg_focus='maxSG'
Selected subgroup: {er <= 0} 
Minutes forestsearch overall = 0.09 
Consistency algorithm used: twostage 
   user  system elapsed 
 23.424   1.773   5.109 
Code
plan("sequential")


# Results for estimation (training) data, which_df = "est" is default
res_tabs <- sg_tables(fs, ndecimals = 3, which_df = "est")

res_tabs$sg10_out
Identified Subgroups
Two-factor subgroups (maxk=2)
Factor 1 Factor 2 N Events E1 HR Pcons
{er <= 0} 82 45 16 1.951 0.950
{er <= 0} {pgr <= 32.5} 75 41 16 2.222 1.000
{er <= 0} !{age <= 43} 68 38 14 2.164 0.960
{er <= 0} {pgr <= 7} 64 34 13 1.992 0.920
{er <= 0} {size <= 35} 61 34 15 2.537 0.970
{er <= 0} {nodes <= 7} 61 31 11 2.335 0.950
{er <= 0} !{size <= 20} 61 35 12 2.054 0.930
Search Configuration: Single-factor candidates (L) = 50; Maximum combinations evaluated = 1,275; Search depth (maxk) = 2
Search Results: Candidate subgroups found = 25; Maximum HR estimate = 2.54
Note: E1 = events in treatment arm; Pcons = consistency proportion
Code
res_tabs$tab_estimates
Treatment Effect Estimates
Training data estimates
Subgroup n n1 events m1 m0 RMST HR (95% CI)
ITT 686 (100.0%) 246 (35.9%) 299 (43.6%) 66.3 50.2 7.8 0.69 (0.54, 0.89)
Questionable 82 (12.0%) 26 (31.7%) 45 (54.9%) 22.9 43.7 -14 1.95 (1.04, 3.67)
Recommend 604 (88.0%) 220 (36.4%) 254 (42.1%) 66.7 52.6 9.3 0.61 (0.47, 0.80)

1.5 Bootstrap Inference

Code
#output_dir <- "dev/vignettes-working/applications/gbsg/results"
output_dir <- "results/"
save_results <- dir.exists(output_dir)
# File pre-fix for saving
fileout_boot <- c("gbsg-k2_v4_B=100")
fileout_cv <- c("gbsg-k2_v4_CV=20")


# patchhwork needed for a combined bootstrap plot (otherwise if not avaialable will not produce)
library(patchwork)

# Number of bootstrap samples
NB <- 1000

system.time({fs_bc <- forestsearch_bootstrap_dofuture(
  fs.est = fs, 
  nb_boots = NB, 
  show_three = FALSE, 
  details = TRUE)
})
Ystar matrix generated should be 'boots x N': 1000 x 686

ForestSearch parameters for bootstrap iterations:
  - sg_focus: maxSG 
  - maxk: 2 
  - fs.splits: 1000 
  - max_subgroups_search: 30 
  - hr.threshold: 1.25 
  - hr.consistency: 1 
  - pconsistency.threshold: 0.9 
  - n.min: 60 
  - use_twostage: TRUE 
  - use_lasso: FALSE 
  - use_grf: TRUE 
  Bootstrap-specific overrides:
  - grf_res: NULL (forces re-selection)
  - grf_cuts: NULL (forces re-selection)
  - parallel_args: sequential (prevents nested parallelism)
  - details: FALSE (suppressed in workers)
  - plot.sg: FALSE
  - plot.grf: FALSE

=== Bootstrap Analysis Complete ===
Success rate: 92.9% (929/1000)

H (Questionable) Estimates:
  Unadjusted:       1.95 (1.04,3.67) 
  Bias-corrected:  1.61 (0.84,3.08) 

Hc (Recommend) Estimates:
  Unadjusted:       0.61 (0.47,0.8) 
  Bias-corrected:  0.66 (0.46,0.95) 
===================================
     user    system   elapsed 
11183.886   206.971   885.608 
Code
plan("sequential")


if (save_results) {
    filename <- file.path(output_dir, 
                         paste0(fileout_boot, 
                                ".RData"))
    save(df.analysis, fs, fs_bc, file = filename)
    cat("\nResults saved to:", filename, "\n")
}

Results saved to: results//gbsg-k2_v4_B=100.RData 

1.5.1 Diagnostics and Summaries

Code
#load("~/Documents/GitHub/forestsearch/vignettes/results/sim_gbsg_example_B=1000.RData")

output_dir <- "results/"

load_results <- dir.exists(output_dir)
if(load_results){
filename <- file.path(output_dir, 
                         paste0(fileout_boot,".RData"))

load(file = filename)
}

summaries <- summarize_bootstrap_results(
      sgharm = fs$sg.harm,
      boot_results = fs_bc,
      create_plots = TRUE,
      est.scale = "hr"
    )

===============================================================
           BOOTSTRAP ANALYSIS SUMMARY                          
===============================================================

BOOTSTRAP SUCCESS METRICS:
-------------------------------------------------------------
  Total iterations:              1000
  Successful subgroup ID:        929 (92.9%)
  Failed to find subgroup:       71 (7.1%)

TIMING ANALYSIS:
-------------------------------------------------------------
Overall:
  Total bootstrap time:          14.73 minutes (0.25 hours)
  Average per iteration:         0.01 min (0.9 sec)

Per-iteration timing:
  Mean:                          0.19 min (11.5 sec)
  Median:                        0.19 min (11.3 sec)
  Std Dev:                       0.08 minutes
  Range:                         [0.04, 0.47] minutes
  IQR:                           [0.13, 0.24] minutes

ForestSearch timing (successful iterations only):
  Iterations with FS:            1000 (100.0%)
  Mean FS time:                  0.19 min (11.5 sec)
  Median FS time:                0.19 min (11.2 sec)
  Total FS time:                 191.92 minutes
  FS time % of total:            1302.8%

Overhead timing (Cox models, bias correction, etc.):
  Mean overhead:                 0.00 min (0.0 sec)
  Median overhead:               0.00 min (0.0 sec)
  Total overhead:                0.21 minutes
  Overhead % of total:           1.4%

PERFORMANCE ASSESSMENT:
-------------------------------------------------------------
  Performance rating:            ✓✓✓ Excellent
  Average iteration speed:       0.9 seconds

===============================================================
Code
sg_tab <- summaries$table

sg_tab
Treatment Effect by Subgroup
Bootstrap bias-corrected estimates (1000 iterations)
Subgroup
Sample Size
Survival
Treatment Effect
N NT Events MedT MedC RMSTd HR
(95% CI)1
HR
(95% CI)2
Qstnbl 82 (12.0%) 26 (31.7%) 45 (54.9%) 22.9 43.7 -14 1.95 (1.04, 3.67) 1.61 (0.84,3.08)
Recmnd 604 (88.0%) 220 (36.4%) 254 (42.1%) 66.7 52.6 9.3 0.61 (0.47, 0.80) 0.66 (0.46,0.95)
1 Unadjusted HR: Standard Cox regression hazard ratio with robust standard errors
2 Bias-corrected HR: Bootstrap-adjusted estimate using infinitesimal jacknife method (1000 iterations). Corrects for optimism in subgroup selection.
Note: Med = Median survival time (months). RMSTd = Restricted mean survival time difference. Subgroup identified in 92.9% of bootstrap samples.
Code
event_summary <- summarize_bootstrap_events(fs_bc, threshold = 12)

=== Bootstrap Event Count Summary ===
Total bootstrap iterations: 1000
Event threshold: <12 events

ORIGINAL Subgroup H on BOOTSTRAP samples:
  Control arm <12 events: 0 (0.0%)
  Treatment arm <12 events: 0 (0.0%)
  Either arm <12 events: 0 (0.0%)

ORIGINAL Subgroup Hc on BOOTSTRAP samples:
  Control arm <12 events: 0 (0.0%)
  Treatment arm <12 events: 0 (0.0%)
  Either arm <12 events: 0 (0.0%)

NEW Subgroups found: 929 (92.9%)

NEW Subgroup H* on ORIGINAL data:
  Control arm <12 events: 9 (1.0% of successful)
  Treatment arm <12 events: 19 (2.0% of successful)
  Either arm <12 events: 27 (2.9% of successful)

NEW Subgroup Hc* on ORIGINAL data:
  Control arm <12 events: 0 (0.0% of successful)
  Treatment arm <12 events: 2 (0.2% of successful)
  Either arm <12 events: 2 (0.2% of successful)
Code
summaries$diagnostics_table_gt
Bootstrap Diagnostics Summary
Analysis of 1000 bootstrap iterations
Category Metric Value
Success Rate1 Total iterations 1000
Successful subgroup ID 929 (92.9%)
Failed to find subgroup 71 (7.1%)
Success rating Excellent ✓✓✓
Subgroup H (Questionable) Unadjusted estimate 1.95 (1.04, 3.67)
Bias-corrected estimate 1.61 (0.84, 3.08)
Bias correction impact2 17.7%
CI width change3 2.64 -> 2.24
Subgroup Hc (Recommend) Unadjusted estimate 0.61 (0.47, 0.80)
Bias-corrected estimate 0.66 (0.46, 0.95)
Bias correction impact2 7.2%
CI width change3 0.33 -> 0.49
Bootstrap Quality: H Valid iterations 929
Mean (SD) 0.47 (0.39)
Coefficient of variation4 83.1%
Skewness5 -0.01
Bootstrap Quality: Hc Valid iterations 929
Mean (SD) -0.42 (0.22)
Coefficient of variation4 52.7%
Skewness5 0.18
Search Performance Mean max HR found 3.24 (1.26)
Mean factors evaluated 47.8
Mean combinations tried 1173
Proportion at maxk --
1 Success Rate: Proportion of bootstrap samples where ForestSearch identified a valid subgroup
2 Bias Correction Impact: Percentage change from unadjusted to bias-corrected estimate
3 CI Width Change: Confidence interval width before -> after bias correction
4 Coefficient of Variation: Standard deviation as % of mean (lower is better)
5 Skewness: Measure of asymmetry (0 = symmetric, |skew| < 1 is generally good)
Interpretation Guide:

Excellent stability: Subgroup is consistently identified across bootstrap samples.

High variability: Bootstrap estimates are imprecise (CV >= 25%). Consider increasing nb_boots or sample size.

Code
summaries$subgroup_summary$original_agreement
                            Metric       Value
                            <char>      <char>
1:      Total bootstrap iterations        1000
2:           Successful iterations         929
3: Failed iterations (no subgroup)          71
4:       Exact match with original   38 (4.1%)
5:         Different from original 891 (95.9%)
Code
summaries$subgroup_summary$factor_presence
  Rank Factor Count   Percent
6    1    pgr   517 55.651238
7    2   size   383 41.227126
1    3    age   365 39.289559
2    4     er   305 32.831001
5    5  nodes   101 10.871905
4    6   meno    61  6.566200
3    7 grade3    45  4.843918
Code
summaries$subgroup_summary$factor_presence_specific
    Rank Base_Factor Factor_Definition Count  Percent
182    1          er         {er <= 8}    95 10.22605
94     2        size     !{size <= 25}   118 12.70183
Code
summaries$plots$combined

1.6 Forest Search n-fold cross-validation

Code
output_dir <- "results/"
load_results <- dir.exists(output_dir)
if(load_results){
filename <- file.path(output_dir, 
                         paste0(fileout_boot,".RData"))

load(file = filename)
}

# Kfolds = n (default to n-fold cross-validations)

fs_OOB <- NULL

fs_OOB <- forestsearch_Kfold(fs.est = fs, details = TRUE,
                             parallel_args = list(plan = "callr", workers = 36, show_message = TRUE))
Cross-validation setup:
  - Observations: 686 
  - Folds: 686 
  - Fold sizes (range): 1-1 

ForestSearch parameters for CV folds:
  - sg_focus: maxSG 
  - maxk: 2 
  - fs.splits: 1000 
  - max_subgroups_search: 30 
  - hr.threshold: 1.25 
  - hr.consistency: 1 
  - pconsistency.threshold: 0.9 
  - n.min: 60 
  - use_twostage: TRUE 
  - use_lasso: FALSE 
  - use_grf: TRUE 
  - (per-fold parallel: sequential)
  - (per-fold details: FALSE)
  - (per-fold plot.sg: FALSE)

Cross-validation complete:
  - Time: 8.66 minutes
  - Subgroup found in 98.5 % of folds
Any found: 0.9854227 
Exact match: 0.9620991 
At least 1 match: 0.9620991 
Cov 1 any: 0.9825073 
Cov 2 any: NaN 
Cov 1 and 2 any: 0 
Cov 1 exact: 0.9620991 
Cov 2 exact: NaN 
Agreement (sens, ppv) in H and Hc: 0.8292683 0.9917219 0.9315068 0.9771615 
Code
# Reset workers to single
plan(sequential)

summary_OOB <- forestsearch_KfoldOut(res=fs_OOB, details=TRUE, outall=TRUE)
Any found: 0.9854227 
Exact match: 0.9620991 
At least 1 match: 0.9620991 
Cov 1 any: 0.9825073 
Cov 2 any: NaN 
Cov 1 and 2 any: 0 
Cov 1 exact: 0.9620991 
Cov 2 exact: NaN 
Agreement (sens, ppv) in H and Hc: 0.8292683 0.9917219 0.9315068 0.9771615 
        Subgroup        n              n1            m1     m0     RMST 
Overall "ITT"           "686 (100.0%)" "246 (35.9%)" "66.3" "50.2" "7.8"
FA_0    "Not recommend" "82 (12.0%)"   "26 (31.7%)"  "22.9" "43.7" "-14"
KfA_0   "Not recommend" "73 (10.6%)"   "21 (28.8%)"  NA     "42.9" "11" 
FA_1    "Recommend"     "604 (88.0%)"  "220 (36.4%)" "66.7" "52.6" "9.3"
KfA_1   "Recommend"     "613 (89.4%)"  "225 (36.7%)" "66.2" "55"   "6.4"
        Hazard ratio       
Overall "0.69 (0.54, 0.89)"
FA_0    "1.95 (1.04, 3.67)"
KfA_0   "0.53 (0.22, 1.25)"
FA_1    "0.61 (0.47, 0.80)"
KfA_1   "0.72 (0.56, 0.93)"
Code
table(summary_OOB$SGs_found[,1])

   !{age <= 43}   !{size <= 25} !{size <= 29.3}       {er <= 0}       {er <= 8} 
              2               4               4             660               6 
Code
table(summary_OOB$SGs_found[,2])

        !{meno}   !{size <= 25} !{size <= 29.3}     {age <= 47}       {er <= 8} 
              3               1               2               2               8 
    {pgr <= 33} 
              1 
Code
Ksims <- 200

fs_ten <- forestsearch_tenfold(fs.est = fs, sims = Ksims, Kfolds = 10, details = TRUE, 
                       parallel_args = list(plan = "callr", workers = 36, show_message = TRUE))
Starting repeated K-fold cross-validation:
  - Simulations: 200 
  - Folds per simulation: 10 
  - Workers: 13 

ForestSearch parameters for CV folds:
  - sg_focus: maxSG 
  - maxk: 2 
  - fs.splits: 1000 
  - max_subgroups_search: 30 
  - hr.threshold: 1.25 
  - hr.consistency: 1 
  - pconsistency.threshold: 0.9 
  - n.min: 60 
  - use_twostage: TRUE 
  - use_lasso: FALSE 
  - use_grf: TRUE 
  - (per-fold parallel: sequential)
  - (per-fold details: FALSE)
  - (per-fold plot.sg: FALSE)

Repeated K-fold CV complete:
  - Time: 20.1 minutes
  - Successful simulations: 200 / 200 
  - Projected hours per 100 sims: 0.17 
Code
# Reset workers to single
plan(sequential)

print(fs_ten$find_summary)
       Any      Exact At least 1       Cov1       Cov2  Cov 1 & 2 Cov1 exact 
       0.8        0.2        0.2        0.5         NA        0.0        0.2 
Cov2 exact 
        NA 
Code
print(fs_ten$sens_summary)
   sens_H   sens_Hc     ppv_H    ppv_Hc 
0.4024390 0.9304636 0.4401670 0.9202306 
Code
print(head(fs_ten$sens_out))
        sens_H   sens_Hc     ppv_H    ppv_Hc
[1,] 0.3780488 0.9519868 0.5166667 0.9185304
[2,] 0.4024390 0.8956954 0.3437500 0.9169492
[3,] 0.3170732 0.9221854 0.3561644 0.9086460
[4,] 0.5121951 0.9337748 0.5121951 0.9337748
[5,] 0.2926829 0.9536424 0.4615385 0.9085174
[6,] 0.2804878 0.9221854 0.3285714 0.9042208
Code
print(head(fs_ten$find_out))
     Any Exact At least 1 Cov1 Cov2 Cov 1 & 2 Cov1 exact Cov2 exact
[1,] 0.7   0.3        0.3  0.4  NaN         0        0.3        NaN
[2,] 0.8   0.2        0.2  0.3  NaN         0        0.2        NaN
[3,] 0.8   0.1        0.1  0.5  NaN         0        0.1        NaN
[4,] 0.9   0.2        0.2  0.7  NaN         0        0.2        NaN
[5,] 0.5   0.2        0.2  0.4  NaN         0        0.2        NaN
[6,] 0.7   0.1        0.1  0.3  NaN         0        0.1        NaN
Code
# Save all results

output_dir <- "results/"
save_results <- dir.exists(output_dir)

if (save_results) {
    filename <- file.path(output_dir, 
                         paste0(fileout_cv, 
                                ".RData"))
    save(df.analysis, fs, fs_bc, fs_ten, fs_OOB, file = filename)
    cat("\nResults saved to:", filename, "\n")
}

Results saved to: results//gbsg-k2_v4_CV=20.RData 
Code
output_dir <- "results/"
load_results <- dir.exists(output_dir)
if(load_results){
filename <- file.path(output_dir, 
                         paste0(fileout_cv,".RData"))

load(file = filename)
}



#' # Define subgroups to display
subgroups <- list(
 age_gt65 = list(
 subset_expr = "age > 65",
 name = "age > 65",
     type = "reference"
   ),
 age_lt65 = list(
 subset_expr = "age <= 65",
 name = "age <= 65",
     type = "reference"
   ),
pgr_positive = list(
 subset_expr = "pgr > 0",
 name = "pgr > 0",
     type = "reference"
   ),
pgr_negative = list(
 subset_expr = "pgr <= 0",
 name = "pgr <= 0",
     type = "reference"
   )
  )


# Create the forest plot
 result <- plot_subgroup_results_forestplot(
   fs_results = list(fs.est = fs, fs_bc = fs_bc, fs_OOB = fs_OOB, fs_kfold = fs_ten),
   df_analysis = df.analysis,
   subgroup_list = subgroups,
   outcome.name = "time_months",
   event.name = "status",
   treat.name = "hormon",
   E.name = "Hormon",
   C.name = "CT",
   ci_column_spaces = 25
 )

# Display the plot
plot(result$plot)